********************************************************************************
*** Chapter 3: Basic summary of missingness in Master data (Sample Means section)
***
*** Created: 8-17-23
*** Modified: 6-18-24
***
********************************************************************************

clear matrix
clear mata
set maxvar 10000

cd "C:\Users\williamslaro\Dropbox\partyBrands\"

********************************************************************************
*** Identify the analysis sample
********************************************************************************

*** Keep only those observations that are in the analysis sample
use "elementsText\data\analyticalSample.dta", clear

keep cmp year survey

sort cmp year survey
tempfile analysis
save `analysis', replace

*** Load those observations into the Party Brands dataset
use "Data\Clean Data\MASTER.dta", clear

sort cmp year survey

di _N
merge cmp year survey using `analysis'
tab _merge
keep if _merge == 1
drop _merge

********************************************************************************
*** Generate some variables to measure missingness across categories
********************************************************************************

* Generate an age indicator
gen age5 = age
recode age5 (min/30=1) (31/40=2) (41/50=3) (51/64=4) (65/max=5)

* Left-right self-placement
gen placeself = lrs
recode placeself (0/99=1) (666 977 986=.) (987/999=0)

* Partisanship
gen inpartisan = 1 if pid == cmp 
recode inpartisan (.=0) if (pid != cmp & !inlist(pid, ., 77777, 88888, 99995, 99996, 99997, 99998, 99999)) | inlist(pid, 77777, 88888)

gen outpartisan = 1 if pid != cmp & !inlist(pid, ., 77777, 88888, 99995, 99996, 99997, 99998, 99999)
recode outpartisan (.=0) if (pid == cmp | inlist(pid, 77777, 88888))

gen nonpartisan = 1 if inlist(pid, 77777, 88888)
recode nonpartisan (.=0) if (pid == cmp) | (pid != cmp & !inlist(pid, ., 77777, 88888, 99995, 99996, 99997, 99998, 99999))

* Vote choice
gen vote = 1 if (voteint == cmp)
recode vote (.=0) if (voteint != cmp) & !inlist(voteint, ., .a)

gen vote_tm1 = 1 if (lastvote == cmp)
recode vote_tm1 (.=0) if (lastvote != cmp) & !inlist(lastvote, ., .a)

* Missingness
lab define lrparty 1 "Not missing" 666 "No CMP code" 977 "Not applicable" 988 "Don't know L-R" 989 "Haven't heard of party" 998 "Refused/No answer" 999 "Don't know"
lab val lrparty lrparty
recode lrparty (0/99=1) (987=.)

gen miss = lrparty
recode miss (1=0) (666 977 987=.) (988/999=1)

********************************************************************************
*** Missingness of Left-Right Party Placement
********************************************************************************

*** Table 3: Frequency and Percentage of Different Categories of Missing Values in the Party Perceptions Dataset
tab lrparty if lrparty != 977

*** Table 4: Average Percentage of Missingness Across Relevant Groups
foreach v of varlist age5 education income female inpartisan outpartisan nonpartisan vote_tm1 placeself {
	bys `v': sum miss
}

